import json
import numpy as np



def get_duplicates_list(your_list):
    new_list = []
    seen = set()

    for item in your_list:
        human_text = next((conv['value'] for conv in item['conversations'] if conv['from'] == 'human'), None)
        if human_text not in seen:
            seen.add(human_text)
            new_list.append(item)
    return new_list





def merge_json_files(file1, file2, output_file):
    with open(file1, 'r') as f1, open(file2, 'r') as f2:
        data1 = json.load(f1)
        data2 = json.load(f2)
    merged_data =data1 + data2
    print(len(merged_data))
    # merged_data=list(np.unique(merged_data))
    # 根据字典的内容去重
    merged_data = get_duplicates_list(merged_data)
    print(len(merged_data))
    with open(output_file, 'w') as of:
        json.dump(merged_data, of, ensure_ascii=False, indent=4)

# 调用函数

merge_json_files('train_data1225.json', '年龄问号挂机.json', 'train_data1226.json')



